In [3]:
from IPython.display import IFrame
IFrame('http://pandas.pydata.org/', width=1000, height=350)
Out[3]:
In [4]:
import pandas as pd
import numpy as np
In [5]:
df_txt = pd.read_csv("data/medidas.txt", header=None, parse_dates=[1], keep_date_col=True)
#df_txt = pd.read_csv('data/all_test.txt', sep='\t', header=None)
df_txt
Out[5]:
0
1
2
0
8
2013-11-28 11:00:00
0
1
15
2013-11-28 11:00:00
692
2
16
2013-11-28 11:00:00
367
3
17
2013-11-28 11:00:00
0
4
18
2013-11-28 11:00:00
0
5
20
2013-12-01 09:30:00
71
6
6
2013-12-05 15:45:00
0
7
5
2013-11-18 22:15:00
29705
8
6
2013-11-18 22:15:00
15364
9
7
2013-11-18 22:15:00
41482
10
8
2013-11-18 22:15:00
52487
11
9
2013-11-18 22:15:00
0
12
10
2013-11-18 22:15:00
0
13
13
2013-11-18 22:15:00
22792
14
14
2013-11-18 22:15:00
18182
15
15
2013-11-18 22:15:00
0
16
16
2013-11-18 22:15:00
0
17
17
2013-11-18 22:15:00
22273
18
18
2013-11-18 22:15:00
31488
19
7
2013-11-18 22:30:00
64266
20
8
2013-11-18 22:30:00
41479
21
9
2013-11-18 22:30:00
0
22
10
2013-11-18 22:30:00
0
23
13
2013-11-18 22:30:00
14344
24
14
2013-11-18 22:30:00
11270
25
15
2013-11-18 22:30:00
0
26
16
2013-11-18 22:30:00
0
27
17
2013-11-18 22:30:00
20481
28
18
2013-11-18 22:30:00
29696
29
5
2013-11-18 22:30:00
24585
...
...
...
...
129691
20
2014-02-09 12:45:00
105
129692
12
2014-02-09 11:30:00
0
129693
9
2014-02-09 11:30:00
0
129694
10
2014-02-09 11:30:00
0
129695
9
2014-02-09 11:45:00
0
129696
10
2014-02-09 11:45:00
0
129697
7
2014-02-09 12:00:00
0
129698
8
2014-02-09 12:00:00
0
129699
5
2014-02-09 12:00:00
0
129700
6
2014-02-09 12:00:00
0
129701
15
2014-02-09 12:00:00
598
129702
16
2014-02-09 12:00:00
304
129703
19
2014-02-09 12:00:00
187
129704
20
2014-02-09 12:00:00
98
129705
17
2014-02-09 12:00:00
0
129706
18
2014-02-09 12:00:00
0
129707
9
2014-02-09 12:15:00
0
129708
10
2014-02-09 12:15:00
0
129709
5
2014-02-09 12:30:00
0
129710
6
2014-02-09 12:30:00
0
129711
15
2014-02-09 12:30:00
616
129712
16
2014-02-09 12:30:00
309
129713
19
2014-02-09 12:30:00
199
129714
20
2014-02-09 12:30:00
104
129715
17
2014-02-09 12:30:00
0
129716
18
2014-02-09 12:30:00
0
129717
11
2014-02-09 12:30:00
0
129718
12
2014-02-09 12:30:00
0
129719
23
2014-02-09 12:45:00
338
129720
24
2014-02-09 12:45:00
227
129721 rows × 3 columns
In [6]:
df_txt.shape
Out[6]:
(129721, 3)
In [7]:
df_other_excel = pd.read_excel('data/test.xls', 'Sheet0') # ver que sucede cuando ponemos header None
#print df_other_excel.columns
df_other_excel['Latitud'].apply(lambda x: x.replace(',', '.'))
Out[7]:
0 -38.73008
1 -38.70583
2 -38.74588
3 -38.71933
4 -38.71498
5 -38.72875
6 -38.74818
7 -38.72875
8 -38.71425
9 -38.69112
10 -38.70475
11 -38.75256
12 -38.70533
13 -38.6940002441406
14 -38.67708
15 -38.68735
16 -38.71095
17 -38.71096
18 -38.72744
19 -38.69223
20 -38.74731
21 -38.75501
22 -38.72717
23 -38.70318
24 -38.71692
25 -38.73853
26 -38.70213
27 -38.72409
28 -38.75764
29 -38.7292
...
6330 -38.70954
6331 -38.71064
6332 -38.78465
6333 -38.70511
6334 -38.72084
6335 -38.70983
6336 -38.69252
6337 -38.7248
6338 -38.71496
6339 -38.73507
6340 -38.69466
6341 -38.70844
6342 -38.73331
6343 -38.70589
6344 -38.72077
6345 -38.71576
6346 -38.77082
6347 -38.71301
6348 -38.73957
6349 -38.77046
6350 -38.70701
6351 -38.71933
6352 -38.75234
6353 -38.71004
6354 -38.74628
6355 -38.7385559082031
6356 -38.72046
6357 -38.71361
6358 -38.71211
6359 -38.71972
Name: Latitud, dtype: object
In [8]:
df_other_excel.shape
Out[8]:
(6360, 11)
In [9]:
df_html = pd.read_html('data/lista_landmark.html')
df_html
Out[9]:
[ 0 1 2
0 Type Value Text
1 NaN NaN Fiducials List
2 NaN 6.2, 128.7, -580.1 Espina nasal anterior
3 NaN 7.2, 154.8, -570.5 Infraposnasal
4 NaN 10.4, 121.4, -576.3 Nariale R
5 NaN 2.9, 121.9, -578.2 Nariale L
6 NaN 18.5, 118.8, -565.0 Alare oseo R
7 NaN -6.4, 120.5, -566.9 Alare oseo L
8 NaN 25.8, 132.4, -575.0 Alare blando R
9 NaN -11.3, 136.4, -575.0 Alare blando L
10 NaN 17.5, 117.2, -559.5 Concha nasal anterior R
11 NaN -5.4, 115.9, -563.7 Concha nasal anterior L
12 NaN 8.9, 127.9, -575.0 Subalare medial R
13 NaN 2.1, 127.0, -575.0 Subalare medial L
14 NaN 20.6, 131.0, -575.0 Subalare lateral R
15 NaN -5.7, 134.8, -575.0 Subalare lateral L
16 NaN 11.4, 148.9, -575.0 Subalare anterior R
17 NaN 4.4, 149.5, -575.0 Subalare anterior L
18 NaN 5.3, 140.4, -544.7 Rhinion
19 NaN 5.3, 128.4, -522.5 Nasion
20 NaN 15.3, 125.5, -524.4 Nasomaxilar R
21 NaN -3.5, 126.1, -524.0 Nasomaxilar L
22 NaN 16.3, 76.8, -527.8 Canal optico R
23 NaN -8.3, 82.8, -527.7 Canal optico L
24 NaN 7.0, 67.3, -557.3 Hormion
25 NaN 6.9, 77.4, -572.8 Espina nasal posterior
26 NaN 17.0, 82.3, -574.8 Coana anterior R
27 NaN -2.3, 81.9, -574.8 Coana anterior L
28 NaN 15.1, 65.5, -552.1 Coana posterior R
29 NaN -1.6, 62.6, -553.1 Coana posterir L
30 NaN -8.2, 72.0, -567.1 Coana lateral R
31 NaN 21.9, 72.3, -566.0 Coana lateral L
32 NaN 6.8, 72.4, -566.0 Coana posterior R
33 NaN 6.2, 71.8, -567.0 Coana posterior L
34 NaN -6.5, 79.1, -561.9 Concha nasal posterior R
35 NaN 21.0, 78.1, -561.0 Concha nasal posterior L
36 NaN 14.9, 128.6, -552.7 Nasal inferior R
37 NaN -3.9, 128.6, -555.0 Nasal inferior L
38 NaN NaN Ruler List
39 NaN 51.4 [mm] Longitud total
40 NaN 0.0 [mm] Longtud media
41 NaN 6.9, 102.6, -579.4 Plano B
42 NaN 6.6, 102.6, -577.3 Tabique inferior
43 NaN 22.9, 102.6, -575.7 Piso lateral R
44 NaN -12.5, 102.6, -575.2 Piso lateral L
45 NaN 26.3, 102.6, -561.3 Margen medio lateral R
46 NaN -12.4, 102.6, -563.4 Margen medio lateral L
47 NaN 6.1, 102.6, -527.5 Tabique medio
48 NaN 19.5, 77.6, -572.4 Piso posterior lateral R
49 NaN -6.9, 77.6, -572.3 Piso posterior lateral L
50 NaN 21.3, 77.6, -561.4 Margen posterior lateral R
51 NaN -7.4, 77.6, -562.0 Margen posterior lateral L
52 NaN 6.6, 77.6, -547.6 Tabique posterior]
In [ ]:
df_ = pd.read_stata?
In [ ]:
df_ = pd.read_hdf?
In [ ]:
#df_tx.to_excel('data/test_out.xls', header=False, index=False) # cuidado con límites de planillas de excel!
In [ ]:
df_excel.to_clipboard?
In [10]:
mi_serie = pd.Series([3.4, 56.3, 56.1])
print mi_serie.values
print mi_serie.index
mi_serie
[ 3.4 56.3 56.1]
Int64Index([0, 1, 2], dtype='int64')
Out[10]:
0 3.4
1 56.3
2 56.1
dtype: float64
In [11]:
mi_serie = pd.Series([3.4, 56.3, 56.1], index=['dist_1', 'dist_2', 'dist_3'])
print mi_serie.values
print mi_serie.index
print mi_serie['dist_2']
print mi_serie[['dist_1', 'dist_3']]
mi_serie
[ 3.4 56.3 56.1]
Index([u'dist_1', u'dist_2', u'dist_3'], dtype='object')
56.3
dist_1 3.4
dist_3 56.1
dtype: float64
Out[11]:
dist_1 3.4
dist_2 56.3
dist_3 56.1
dtype: float64
In [12]:
mi_serie[mi_serie > 4]
Out[12]:
dist_2 56.3
dist_3 56.1
dtype: float64
In [13]:
mi_serie * 2
Out[13]:
dist_1 6.8
dist_2 112.6
dist_3 112.2
dtype: float64
In [14]:
np.sqrt(mi_serie)
Out[14]:
dist_1 1.843909
dist_2 7.503333
dist_3 7.489993
dtype: float64
In [15]:
pd.notnull(mi_serie)
Out[15]:
dist_1 True
dist_2 True
dist_3 True
dtype: bool
In [16]:
pd.isnull(mi_serie)
Out[16]:
dist_1 False
dist_2 False
dist_3 False
dtype: bool
In [17]:
dates_complete = pd.date_range('1/18/2013', '02/09/2014', freq='15T')
other_dates = pd.date_range('1/18/2013', '02/09/2014', freq='1D')
print dates_complete
print other_dates
mi_serie = pd.Series(other_dates)
mi_serie_med = pd.Series(dates_complete)
DatetimeIndex(['2013-01-18 00:00:00', '2013-01-18 00:15:00',
'2013-01-18 00:30:00', '2013-01-18 00:45:00',
'2013-01-18 01:00:00', '2013-01-18 01:15:00',
'2013-01-18 01:30:00', '2013-01-18 01:45:00',
'2013-01-18 02:00:00', '2013-01-18 02:15:00',
...
'2014-02-08 21:45:00', '2014-02-08 22:00:00',
'2014-02-08 22:15:00', '2014-02-08 22:30:00',
'2014-02-08 22:45:00', '2014-02-08 23:00:00',
'2014-02-08 23:15:00', '2014-02-08 23:30:00',
'2014-02-08 23:45:00', '2014-02-09 00:00:00'],
dtype='datetime64[ns]', length=37153, freq='15T')
DatetimeIndex(['2013-01-18', '2013-01-19', '2013-01-20', '2013-01-21',
'2013-01-22', '2013-01-23', '2013-01-24', '2013-01-25',
'2013-01-26', '2013-01-27',
...
'2014-01-31', '2014-02-01', '2014-02-02', '2014-02-03',
'2014-02-04', '2014-02-05', '2014-02-06', '2014-02-07',
'2014-02-08', '2014-02-09'],
dtype='datetime64[ns]', length=388, freq='D')
In [18]:
type(df_other_excel)
Out[18]:
pandas.core.frame.DataFrame
In [19]:
df_other_excel
Out[19]:
Fecha
Evento
Domicilio
Localidad
Latitud
Longitud
Cantidad Ambulancias
Ambulancia
Derivación
Atenciones en Lugar
Derivaciones a Hospital
0
2014-01-01 00:12:59
ATENCION MEDICA EN DOMICILIOS
NEWTON 1800
BAHIA BLANCA
-38,73008
-62,23579
NaN
NaN
NaN
NaN
NaN
1
2014-01-01 02:16:00
ACCIDENTE DE TRANSITO
TERRADA esq. 17 DE MAYO
BAHIA BLANCA
-38,70583
-62,29642
NaN
NaN
NaN
NaN
NaN
2
2014-01-01 03:08:40
ACCIDENTE DE TRANSITO
CASEROS 3300
BAHIA BLANCA
-38,74588
-62,22757
NaN
NaN
NaN
NaN
NaN
3
2014-01-01 03:10:06
ATENCION MEDICA EN VIA PUBLICA
CATAMARCA 1500
BAHIA BLANCA
-38,71933
-62,30212
NaN
NaN
NaN
NaN
NaN
4
2014-01-01 03:29:06
ACCIDENTE DE TRANSITO
DR ADRIAN MORADO VERES esq. JUJUY
BAHIA BLANCA
-38,71498
-62,31009
NaN
NaN
NaN
NaN
NaN
5
2014-01-01 03:56:44
ATENCION MEDICA EN DOMICILIOS
MATHEU 6000
BAHIA BLANCA
-38,72875
-62,23798
NaN
NaN
NaN
NaN
NaN
6
2014-01-01 04:12:58
ATENCION MEDICA EN VIA PUBLICA
CNEL VIDAL 9000
BAHIA BLANCA
-38,74818
-62,28333
NaN
NaN
NaN
NaN
NaN
7
2014-01-01 04:19:14
ATENCION MEDICA EN DOMICILIOS
MATHEU 6000
BAHIA BLANCA
-38,72875
-62,23798
NaN
NaN
NaN
NaN
NaN
8
2014-01-01 04:21:42
ATENCION MEDICA EN VIA PUBLICA
CORONEL MARTINIANO RODRIGUEZ esq. MITRE
BAHIA BLANCA
-38,71425
-62,26583
NaN
NaN
NaN
NaN
NaN
9
2014-01-01 05:01:05
ATENCION MEDICA EN VIA PUBLICA
LEOPOLDO LUGONES 0
BAHIA BLANCA
-38,69112
-62,3053
NaN
NaN
NaN
NaN
NaN
10
2014-01-01 05:11:52
ATENCION MEDICA EN VIA PUBLICA
PBRO JUAN BAUTISTA BIGIO 9000
BAHIA BLANCA
-38,70475
-62,31227
NaN
NaN
NaN
NaN
NaN
11
2014-01-01 05:18:04
ATENCION MEDICA EN VIA PUBLICA
RIO NEGRO 2400
BAHIA BLANCA
-38,75256
-62,27807
NaN
NaN
NaN
NaN
NaN
12
2014-01-01 06:54:26
ATENCION MEDICA EN VIA PUBLICA
FUERTE ARGENTINO 3000
BAHIA BLANCA
-38,70533
-62,26081
NaN
NaN
NaN
NaN
NaN
13
2014-01-01 08:51:31
ACCIDENTE DE TRANSITO
CABRERA 3900
BAHIA BLANCA
-38,6940002441406
-62,2383575439453
NaN
NaN
NaN
NaN
NaN
14
2014-01-01 09:28:54
ATENCION MEDICA EN DOMICILIOS
FRANCISCA HERNANDEZ 0
BAHIA BLANCA
-38,67708
-62,32211
NaN
NaN
NaN
NaN
NaN
15
2014-01-01 10:43:52
ATENCION MEDICA EN DOMICILIOS
ESTEBAN RIGAMONTI 3000
BAHIA BLANCA
-38,68735
-62,29587
NaN
NaN
NaN
NaN
NaN
16
2014-01-01 11:07:58
ATENCION MEDICA EN DOMICILIOS
RINCON 279 1 0
BAHIA BLANCA
-38,71095
-62,25317
NaN
NaN
NaN
NaN
NaN
17
2014-01-01 11:58:29
RECLAMOS POR ARBOLES CAIDOS O RAMAS
MENDOZA 0
BAHIA BLANCA
-38,71096
-62,28166
NaN
NaN
NaN
NaN
NaN
18
2014-01-01 12:38:07
ATENCION MEDICA EN DOMICILIOS
ENTRE RIOS 2100
BAHIA BLANCA
-38,72744
-62,30508
NaN
NaN
NaN
NaN
NaN
19
2014-01-01 12:51:37
ATENCION MEDICA EN VIA PUBLICA
AYACUCHO 3000
BAHIA BLANCA
-38,69223
-62,29871
NaN
NaN
NaN
NaN
NaN
20
2014-01-01 13:01:25
ATENCION MEDICA EN DOMICILIOS
TIERRA DEL FUEGO 1500
BAHIA BLANCA
-38,74731
-62,26654
NaN
NaN
NaN
NaN
NaN
21
2014-01-01 14:40:51
ATENCION MEDICA EN DOMICILIOS
GRAL NICOLAS LEVALLE 1800
BAHIA BLANCA
-38,75501
-62,26722
NaN
NaN
NaN
NaN
NaN
22
2014-01-01 15:04:30
ATENCION MEDICA EN DOMICILIOS
DON BOSCO 3000
BAHIA BLANCA
-38,72717
-62,2875
NaN
NaN
NaN
NaN
NaN
23
2014-01-01 15:52:56
ATENCION MEDICA EN VIA PUBLICA
12 DE OCTUBRE 6000
BAHIA BLANCA
-38,70318
-62,26633
NaN
NaN
NaN
NaN
NaN
24
2014-01-01 15:58:02
ATENCION MEDICA EN DOMICILIOS
GRAL PABLO RICCHIERI 2400
BAHIA BLANCA
-38,71692
-62,32015
NaN
NaN
NaN
NaN
NaN
25
2014-01-01 16:09:32
ATENCION MEDICA EN DOMICILIOS
HUAURA 3000
BAHIA BLANCA
-38,73853
-62,2314
NaN
NaN
NaN
NaN
NaN
26
2014-01-01 16:29:15
ATENCION MEDICA EN DOMICILIOS
HUMBERTO PRIMO 3000
BAHIA BLANCA
-38,70213
-62,27465
NaN
NaN
NaN
NaN
NaN
27
2014-01-01 16:49:20
ACCIDENTE DE TRANSITO
CNEL ROSALES esq. D ORBIGNY
BAHIA BLANCA
-38,72409
-62,23008
NaN
NaN
NaN
NaN
NaN
28
2014-01-01 17:13:52
ATENCION MEDICA EN DOMICILIOS
HONDURAS 2700
BAHIA BLANCA
-38,75764
-62,27384
NaN
NaN
NaN
NaN
NaN
29
2014-01-01 17:19:15
ATENCION MEDICA EN DOMICILIOS
DARREGUEIRA 6000
BAHIA BLANCA
-38,7292
-62,26293
NaN
NaN
NaN
NaN
NaN
...
...
...
...
...
...
...
...
...
...
...
...
6330
2014-05-21 09:48:48
RECLAMOS POR ARBOLES CAIDOS O RAMAS
AVDA ALEM 3000
BAHIA BLANCA
-38,70954
-62,26416
NaN
NaN
NaN
NaN
NaN
6331
2014-05-21 11:10:07
ATENCION MEDICA EN DOMICILIOS
SALTA 0
BAHIA BLANCA
-38,71064
-62,27452
NaN
NaN
NaN
NaN
NaN
6332
2014-05-21 11:33:10
ATENCION MEDICA EN DOMICILIOS
GRAL MANUEL BELGRANO 3600
ING. WHITE
-38,78465
-62,26603
NaN
NaN
NaN
NaN
NaN
6333
2014-05-21 11:46:09
ACCIDENTE DE TRANSITO
ESTOMBA esq. CORONEL MARTIN RODRIGUEZ
BAHIA BLANCA
-38,70511
-62,28356
NaN
NaN
NaN
NaN
NaN
6334
2014-05-21 12:18:34
ACCIDENTE DE TRANSITO
TUCUMAN esq. PTE ROCA
BAHIA BLANCA
-38,72084
-62,27851
NaN
NaN
NaN
NaN
NaN
6335
2014-05-21 13:07:43
ACCIDENTE DE TRANSITO
NAPOSTA esq. HIPOLITO YRIGOYEN
BAHIA BLANCA
-38,70983
-62,25637
NaN
NaN
NaN
NaN
NaN
6336
2014-05-21 13:19:25
ACCIDENTE DE TRANSITO
FLORIDA esq. GRAL ALVEAR
BAHIA BLANCA
-38,69252
-62,27166
NaN
NaN
NaN
NaN
NaN
6337
2014-05-21 13:42:09
ATENCION MEDICA EN DOMICILIOS
PJE JERONIMO CALVENTO 1800
BAHIA BLANCA
-38,7248
-62,3004
NaN
NaN
NaN
NaN
NaN
6338
2014-05-21 14:21:38
ATENCION MEDICA EN DOMICILIOS
PAMPA CENTRAL 2100
BAHIA BLANCA
-38,71496
-62,31219
NaN
NaN
NaN
NaN
NaN
6339
2014-05-21 14:40:44
ATENCION MEDICA EN DOMICILIOS
25 DE MAYO 9000
BAHIA BLANCA
-38,73507
-62,2701
NaN
NaN
NaN
NaN
NaN
6340
2014-05-21 15:35:31
ATENCION MEDICA EN DOMICILIOS
ÑANCU 0
BAHIA BLANCA
-38,69466
-62,21915
NaN
NaN
NaN
NaN
NaN
6341
2014-05-21 16:03:22
ACCIDENTE DE TRANSITO
AVDA ALEM 6000
BAHIA BLANCA
-38,70844
-62,26504
NaN
NaN
NaN
NaN
NaN
6342
2014-05-21 16:05:40
ATENCION MEDICA EN DOMICILIOS
3 DE FEBRERO 2700
BAHIA BLANCA
-38,73331
-62,22632
NaN
NaN
NaN
NaN
NaN
6343
2014-05-21 16:41:30
ACCIDENTE DE TRANSITO
VALENTIN ALSINA esq. RUFINO ROJAS
BAHIA BLANCA
-38,70589
-62,24969
NaN
NaN
NaN
NaN
NaN
6344
2014-05-21 17:28:42
ACCIDENTE DE TRANSITO
MENDOZA esq. BOLIVIA
BAHIA BLANCA
-38,72077
-62,29408
NaN
NaN
NaN
NaN
NaN
6345
2014-05-21 18:05:24
ACCIDENTE DE TRANSITO
BELGRANO 3000
BAHIA BLANCA
-38,71576
-62,2595
NaN
NaN
NaN
NaN
NaN
6346
2014-05-21 19:00:20
ATENCION MEDICA EN DOMICILIOS
MAESTRO PICCIOLI 3600
ING. WHITE
-38,77082
-62,27824
NaN
NaN
NaN
NaN
NaN
6347
2014-05-21 19:16:39
ACCIDENTE DE TRANSITO
PABLO LEJARRAGA esq. 14 DE JULIO
BAHIA BLANCA
-38,71301
-62,2291
NaN
NaN
NaN
NaN
NaN
6348
2014-05-21 20:14:04
ATENCION MEDICA EN DOMICILIOS
RIVADAVIA 2700
BAHIA BLANCA
-38,73957
-62,23365
NaN
NaN
NaN
NaN
NaN
6349
2014-05-21 20:20:29
RECLAMOS POR HUMO EN ZONA INDUSTRIAL
MAESTRO PICCIOLI 3600
ING. WHITE
-38,77046
-62,27781
NaN
NaN
NaN
NaN
NaN
6350
2014-05-21 20:45:28
RECLAMOS POR BACHES EN VIA PUBLICA
12 DE OCTUBRE esq. CARONTI
BAHIA BLANCA
-38,70701
-62,26326
NaN
NaN
NaN
NaN
NaN
6351
2014-05-21 21:32:24
ATENCION MEDICA A PARTURIENTAS
CORNELIO SAAVEDRA 3000
CERRI
-38,71933
-62,39386
NaN
NaN
NaN
NaN
NaN
6352
2014-05-21 22:03:08
RECLAMOS POR ARBOLES CAIDOS O RAMAS
RAFAEL OBLIGADO 5100
BAHIA BLANCA
-38,75234
-62,18884
NaN
NaN
NaN
NaN
NaN
6353
2014-05-22 01:05:32
ATENCION MEDICA EN DOMICILIOS
BELGRANO 9000
BAHIA BLANCA
-38,71004
-62,25299
NaN
NaN
NaN
NaN
NaN
6354
2014-05-22 02:37:26
ATENCION MEDICA EN DOMICILIOS
MARCOS MORA 3000
BAHIA BLANCA
-38,74628
-62,22436
NaN
NaN
NaN
NaN
NaN
6355
2014-05-22 06:09:10
ACCIDENTE DE TRANSITO
MARIANO NECOCHEA 0
BAHIA BLANCA
-38,7385559082031
-62,2416229248047
NaN
NaN
NaN
NaN
NaN
6356
2014-05-22 06:36:25
ATENCION MEDICA EN COMISARIAS
PTE ROCA 3000
BAHIA BLANCA
-38,72046
-62,27804
NaN
NaN
NaN
NaN
NaN
6357
2014-05-22 07:19:18
ACCIDENTE DE TRANSITO
AVDA ALEM esq. HIPOLITO YRIGOYEN
BAHIA BLANCA
-38,71361
-62,26112
NaN
NaN
NaN
NaN
NaN
6358
2014-05-22 09:09:33
ATENCION MEDICA EN DOMICILIOS
CORONEL MARTINIANO RODRIGUEZ 3000
BAHIA BLANCA
-38,71211
-62,26304
NaN
NaN
NaN
NaN
NaN
6359
2014-05-22 09:22:10
RECLAMOS POR ARBOLES CAIDOS O RAMAS
HARDING GREEN 9000
BAHIA BLANCA
-38,71972
-62,28791
NaN
NaN
NaN
NaN
NaN
6360 rows × 11 columns
In [20]:
df_other_excel[['Evento', 'Domicilio']]
Out[20]:
Evento
Domicilio
0
ATENCION MEDICA EN DOMICILIOS
NEWTON 1800
1
ACCIDENTE DE TRANSITO
TERRADA esq. 17 DE MAYO
2
ACCIDENTE DE TRANSITO
CASEROS 3300
3
ATENCION MEDICA EN VIA PUBLICA
CATAMARCA 1500
4
ACCIDENTE DE TRANSITO
DR ADRIAN MORADO VERES esq. JUJUY
5
ATENCION MEDICA EN DOMICILIOS
MATHEU 6000
6
ATENCION MEDICA EN VIA PUBLICA
CNEL VIDAL 9000
7
ATENCION MEDICA EN DOMICILIOS
MATHEU 6000
8
ATENCION MEDICA EN VIA PUBLICA
CORONEL MARTINIANO RODRIGUEZ esq. MITRE
9
ATENCION MEDICA EN VIA PUBLICA
LEOPOLDO LUGONES 0
10
ATENCION MEDICA EN VIA PUBLICA
PBRO JUAN BAUTISTA BIGIO 9000
11
ATENCION MEDICA EN VIA PUBLICA
RIO NEGRO 2400
12
ATENCION MEDICA EN VIA PUBLICA
FUERTE ARGENTINO 3000
13
ACCIDENTE DE TRANSITO
CABRERA 3900
14
ATENCION MEDICA EN DOMICILIOS
FRANCISCA HERNANDEZ 0
15
ATENCION MEDICA EN DOMICILIOS
ESTEBAN RIGAMONTI 3000
16
ATENCION MEDICA EN DOMICILIOS
RINCON 279 1 0
17
RECLAMOS POR ARBOLES CAIDOS O RAMAS
MENDOZA 0
18
ATENCION MEDICA EN DOMICILIOS
ENTRE RIOS 2100
19
ATENCION MEDICA EN VIA PUBLICA
AYACUCHO 3000
20
ATENCION MEDICA EN DOMICILIOS
TIERRA DEL FUEGO 1500
21
ATENCION MEDICA EN DOMICILIOS
GRAL NICOLAS LEVALLE 1800
22
ATENCION MEDICA EN DOMICILIOS
DON BOSCO 3000
23
ATENCION MEDICA EN VIA PUBLICA
12 DE OCTUBRE 6000
24
ATENCION MEDICA EN DOMICILIOS
GRAL PABLO RICCHIERI 2400
25
ATENCION MEDICA EN DOMICILIOS
HUAURA 3000
26
ATENCION MEDICA EN DOMICILIOS
HUMBERTO PRIMO 3000
27
ACCIDENTE DE TRANSITO
CNEL ROSALES esq. D ORBIGNY
28
ATENCION MEDICA EN DOMICILIOS
HONDURAS 2700
29
ATENCION MEDICA EN DOMICILIOS
DARREGUEIRA 6000
...
...
...
6330
RECLAMOS POR ARBOLES CAIDOS O RAMAS
AVDA ALEM 3000
6331
ATENCION MEDICA EN DOMICILIOS
SALTA 0
6332
ATENCION MEDICA EN DOMICILIOS
GRAL MANUEL BELGRANO 3600
6333
ACCIDENTE DE TRANSITO
ESTOMBA esq. CORONEL MARTIN RODRIGUEZ
6334
ACCIDENTE DE TRANSITO
TUCUMAN esq. PTE ROCA
6335
ACCIDENTE DE TRANSITO
NAPOSTA esq. HIPOLITO YRIGOYEN
6336
ACCIDENTE DE TRANSITO
FLORIDA esq. GRAL ALVEAR
6337
ATENCION MEDICA EN DOMICILIOS
PJE JERONIMO CALVENTO 1800
6338
ATENCION MEDICA EN DOMICILIOS
PAMPA CENTRAL 2100
6339
ATENCION MEDICA EN DOMICILIOS
25 DE MAYO 9000
6340
ATENCION MEDICA EN DOMICILIOS
ÑANCU 0
6341
ACCIDENTE DE TRANSITO
AVDA ALEM 6000
6342
ATENCION MEDICA EN DOMICILIOS
3 DE FEBRERO 2700
6343
ACCIDENTE DE TRANSITO
VALENTIN ALSINA esq. RUFINO ROJAS
6344
ACCIDENTE DE TRANSITO
MENDOZA esq. BOLIVIA
6345
ACCIDENTE DE TRANSITO
BELGRANO 3000
6346
ATENCION MEDICA EN DOMICILIOS
MAESTRO PICCIOLI 3600
6347
ACCIDENTE DE TRANSITO
PABLO LEJARRAGA esq. 14 DE JULIO
6348
ATENCION MEDICA EN DOMICILIOS
RIVADAVIA 2700
6349
RECLAMOS POR HUMO EN ZONA INDUSTRIAL
MAESTRO PICCIOLI 3600
6350
RECLAMOS POR BACHES EN VIA PUBLICA
12 DE OCTUBRE esq. CARONTI
6351
ATENCION MEDICA A PARTURIENTAS
CORNELIO SAAVEDRA 3000
6352
RECLAMOS POR ARBOLES CAIDOS O RAMAS
RAFAEL OBLIGADO 5100
6353
ATENCION MEDICA EN DOMICILIOS
BELGRANO 9000
6354
ATENCION MEDICA EN DOMICILIOS
MARCOS MORA 3000
6355
ACCIDENTE DE TRANSITO
MARIANO NECOCHEA 0
6356
ATENCION MEDICA EN COMISARIAS
PTE ROCA 3000
6357
ACCIDENTE DE TRANSITO
AVDA ALEM esq. HIPOLITO YRIGOYEN
6358
ATENCION MEDICA EN DOMICILIOS
CORONEL MARTINIANO RODRIGUEZ 3000
6359
RECLAMOS POR ARBOLES CAIDOS O RAMAS
HARDING GREEN 9000
6360 rows × 2 columns
In [21]:
print df_txt.columns
df_txt.columns = ['medidor', 'date', 'value']
print df_txt.columns
df_txt
df_txt['medidor'].drop_duplicates()
Int64Index([0, 1, 2], dtype='int64')
Index([u'medidor', u'date', u'value'], dtype='object')
Out[21]:
0 8
1 15
2 16
3 17
4 18
5 20
6 6
7 5
9 7
11 9
12 10
13 13
14 14
773 19
775 21
776 22
795 23
796 24
834 11
835 12
Name: medidor, dtype: int64
In [22]:
df_txt.ix[:4, :3]
Out[22]:
medidor
date
value
0
8
2013-11-28 11:00:00
0
1
15
2013-11-28 11:00:00
692
2
16
2013-11-28 11:00:00
367
3
17
2013-11-28 11:00:00
0
4
18
2013-11-28 11:00:00
0
In [23]:
df_txt['nueva_col'] = np.arange(len(df_txt['medidor']))
df_txt
Out[23]:
medidor
date
value
nueva_col
0
8
2013-11-28 11:00:00
0
0
1
15
2013-11-28 11:00:00
692
1
2
16
2013-11-28 11:00:00
367
2
3
17
2013-11-28 11:00:00
0
3
4
18
2013-11-28 11:00:00
0
4
5
20
2013-12-01 09:30:00
71
5
6
6
2013-12-05 15:45:00
0
6
7
5
2013-11-18 22:15:00
29705
7
8
6
2013-11-18 22:15:00
15364
8
9
7
2013-11-18 22:15:00
41482
9
10
8
2013-11-18 22:15:00
52487
10
11
9
2013-11-18 22:15:00
0
11
12
10
2013-11-18 22:15:00
0
12
13
13
2013-11-18 22:15:00
22792
13
14
14
2013-11-18 22:15:00
18182
14
15
15
2013-11-18 22:15:00
0
15
16
16
2013-11-18 22:15:00
0
16
17
17
2013-11-18 22:15:00
22273
17
18
18
2013-11-18 22:15:00
31488
18
19
7
2013-11-18 22:30:00
64266
19
20
8
2013-11-18 22:30:00
41479
20
21
9
2013-11-18 22:30:00
0
21
22
10
2013-11-18 22:30:00
0
22
23
13
2013-11-18 22:30:00
14344
23
24
14
2013-11-18 22:30:00
11270
24
25
15
2013-11-18 22:30:00
0
25
26
16
2013-11-18 22:30:00
0
26
27
17
2013-11-18 22:30:00
20481
27
28
18
2013-11-18 22:30:00
29696
28
29
5
2013-11-18 22:30:00
24585
29
...
...
...
...
...
129691
20
2014-02-09 12:45:00
105
129691
129692
12
2014-02-09 11:30:00
0
129692
129693
9
2014-02-09 11:30:00
0
129693
129694
10
2014-02-09 11:30:00
0
129694
129695
9
2014-02-09 11:45:00
0
129695
129696
10
2014-02-09 11:45:00
0
129696
129697
7
2014-02-09 12:00:00
0
129697
129698
8
2014-02-09 12:00:00
0
129698
129699
5
2014-02-09 12:00:00
0
129699
129700
6
2014-02-09 12:00:00
0
129700
129701
15
2014-02-09 12:00:00
598
129701
129702
16
2014-02-09 12:00:00
304
129702
129703
19
2014-02-09 12:00:00
187
129703
129704
20
2014-02-09 12:00:00
98
129704
129705
17
2014-02-09 12:00:00
0
129705
129706
18
2014-02-09 12:00:00
0
129706
129707
9
2014-02-09 12:15:00
0
129707
129708
10
2014-02-09 12:15:00
0
129708
129709
5
2014-02-09 12:30:00
0
129709
129710
6
2014-02-09 12:30:00
0
129710
129711
15
2014-02-09 12:30:00
616
129711
129712
16
2014-02-09 12:30:00
309
129712
129713
19
2014-02-09 12:30:00
199
129713
129714
20
2014-02-09 12:30:00
104
129714
129715
17
2014-02-09 12:30:00
0
129715
129716
18
2014-02-09 12:30:00
0
129716
129717
11
2014-02-09 12:30:00
0
129717
129718
12
2014-02-09 12:30:00
0
129718
129719
23
2014-02-09 12:45:00
338
129719
129720
24
2014-02-09 12:45:00
227
129720
129721 rows × 4 columns
In [24]:
df_txt.T
Out[24]:
0
1
2
3
4
5
6
7
8
9
...
129711
129712
129713
129714
129715
129716
129717
129718
129719
129720
medidor
8
15
16
17
18
20
6
5
6
7
...
15
16
19
20
17
18
11
12
23
24
date
2013-11-28 11:00:00
2013-11-28 11:00:00
2013-11-28 11:00:00
2013-11-28 11:00:00
2013-11-28 11:00:00
2013-12-01 09:30:00
2013-12-05 15:45:00
2013-11-18 22:15:00
2013-11-18 22:15:00
2013-11-18 22:15:00
...
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:30:00
2014-02-09 12:45:00
2014-02-09 12:45:00
value
0
692
367
0
0
71
0
29705
15364
41482
...
616
309
199
104
0
0
0
0
338
227
nueva_col
0
1
2
3
4
5
6
7
8
9
...
129711
129712
129713
129714
129715
129716
129717
129718
129719
129720
4 rows × 129721 columns
In [25]:
df_txt.values
Out[25]:
array([[8, Timestamp('2013-11-28 11:00:00'), 0, 0],
[15, Timestamp('2013-11-28 11:00:00'), 692, 1],
[16, Timestamp('2013-11-28 11:00:00'), 367, 2],
...,
[12, Timestamp('2014-02-09 12:30:00'), 0, 129718],
[23, Timestamp('2014-02-09 12:45:00'), 338, 129719],
[24, Timestamp('2014-02-09 12:45:00'), 227, 129720]], dtype=object)
In [26]:
df_txt.reindex(columns=['medidor', 'date', 'value'])
Out[26]:
medidor
date
value
0
8
2013-11-28 11:00:00
0
1
15
2013-11-28 11:00:00
692
2
16
2013-11-28 11:00:00
367
3
17
2013-11-28 11:00:00
0
4
18
2013-11-28 11:00:00
0
5
20
2013-12-01 09:30:00
71
6
6
2013-12-05 15:45:00
0
7
5
2013-11-18 22:15:00
29705
8
6
2013-11-18 22:15:00
15364
9
7
2013-11-18 22:15:00
41482
10
8
2013-11-18 22:15:00
52487
11
9
2013-11-18 22:15:00
0
12
10
2013-11-18 22:15:00
0
13
13
2013-11-18 22:15:00
22792
14
14
2013-11-18 22:15:00
18182
15
15
2013-11-18 22:15:00
0
16
16
2013-11-18 22:15:00
0
17
17
2013-11-18 22:15:00
22273
18
18
2013-11-18 22:15:00
31488
19
7
2013-11-18 22:30:00
64266
20
8
2013-11-18 22:30:00
41479
21
9
2013-11-18 22:30:00
0
22
10
2013-11-18 22:30:00
0
23
13
2013-11-18 22:30:00
14344
24
14
2013-11-18 22:30:00
11270
25
15
2013-11-18 22:30:00
0
26
16
2013-11-18 22:30:00
0
27
17
2013-11-18 22:30:00
20481
28
18
2013-11-18 22:30:00
29696
29
5
2013-11-18 22:30:00
24585
...
...
...
...
129691
20
2014-02-09 12:45:00
105
129692
12
2014-02-09 11:30:00
0
129693
9
2014-02-09 11:30:00
0
129694
10
2014-02-09 11:30:00
0
129695
9
2014-02-09 11:45:00
0
129696
10
2014-02-09 11:45:00
0
129697
7
2014-02-09 12:00:00
0
129698
8
2014-02-09 12:00:00
0
129699
5
2014-02-09 12:00:00
0
129700
6
2014-02-09 12:00:00
0
129701
15
2014-02-09 12:00:00
598
129702
16
2014-02-09 12:00:00
304
129703
19
2014-02-09 12:00:00
187
129704
20
2014-02-09 12:00:00
98
129705
17
2014-02-09 12:00:00
0
129706
18
2014-02-09 12:00:00
0
129707
9
2014-02-09 12:15:00
0
129708
10
2014-02-09 12:15:00
0
129709
5
2014-02-09 12:30:00
0
129710
6
2014-02-09 12:30:00
0
129711
15
2014-02-09 12:30:00
616
129712
16
2014-02-09 12:30:00
309
129713
19
2014-02-09 12:30:00
199
129714
20
2014-02-09 12:30:00
104
129715
17
2014-02-09 12:30:00
0
129716
18
2014-02-09 12:30:00
0
129717
11
2014-02-09 12:30:00
0
129718
12
2014-02-09 12:30:00
0
129719
23
2014-02-09 12:45:00
338
129720
24
2014-02-09 12:45:00
227
129721 rows × 3 columns
In [27]:
df_txt.drop([0, 1])
Out[27]:
medidor
date
value
nueva_col
2
16
2013-11-28 11:00:00
367
2
3
17
2013-11-28 11:00:00
0
3
4
18
2013-11-28 11:00:00
0
4
5
20
2013-12-01 09:30:00
71
5
6
6
2013-12-05 15:45:00
0
6
7
5
2013-11-18 22:15:00
29705
7
8
6
2013-11-18 22:15:00
15364
8
9
7
2013-11-18 22:15:00
41482
9
10
8
2013-11-18 22:15:00
52487
10
11
9
2013-11-18 22:15:00
0
11
12
10
2013-11-18 22:15:00
0
12
13
13
2013-11-18 22:15:00
22792
13
14
14
2013-11-18 22:15:00
18182
14
15
15
2013-11-18 22:15:00
0
15
16
16
2013-11-18 22:15:00
0
16
17
17
2013-11-18 22:15:00
22273
17
18
18
2013-11-18 22:15:00
31488
18
19
7
2013-11-18 22:30:00
64266
19
20
8
2013-11-18 22:30:00
41479
20
21
9
2013-11-18 22:30:00
0
21
22
10
2013-11-18 22:30:00
0
22
23
13
2013-11-18 22:30:00
14344
23
24
14
2013-11-18 22:30:00
11270
24
25
15
2013-11-18 22:30:00
0
25
26
16
2013-11-18 22:30:00
0
26
27
17
2013-11-18 22:30:00
20481
27
28
18
2013-11-18 22:30:00
29696
28
29
5
2013-11-18 22:30:00
24585
29
30
6
2013-11-18 22:30:00
15364
30
31
7
2013-11-18 22:45:00
60170
31
...
...
...
...
...
129691
20
2014-02-09 12:45:00
105
129691
129692
12
2014-02-09 11:30:00
0
129692
129693
9
2014-02-09 11:30:00
0
129693
129694
10
2014-02-09 11:30:00
0
129694
129695
9
2014-02-09 11:45:00
0
129695
129696
10
2014-02-09 11:45:00
0
129696
129697
7
2014-02-09 12:00:00
0
129697
129698
8
2014-02-09 12:00:00
0
129698
129699
5
2014-02-09 12:00:00
0
129699
129700
6
2014-02-09 12:00:00
0
129700
129701
15
2014-02-09 12:00:00
598
129701
129702
16
2014-02-09 12:00:00
304
129702
129703
19
2014-02-09 12:00:00
187
129703
129704
20
2014-02-09 12:00:00
98
129704
129705
17
2014-02-09 12:00:00
0
129705
129706
18
2014-02-09 12:00:00
0
129706
129707
9
2014-02-09 12:15:00
0
129707
129708
10
2014-02-09 12:15:00
0
129708
129709
5
2014-02-09 12:30:00
0
129709
129710
6
2014-02-09 12:30:00
0
129710
129711
15
2014-02-09 12:30:00
616
129711
129712
16
2014-02-09 12:30:00
309
129712
129713
19
2014-02-09 12:30:00
199
129713
129714
20
2014-02-09 12:30:00
104
129714
129715
17
2014-02-09 12:30:00
0
129715
129716
18
2014-02-09 12:30:00
0
129716
129717
11
2014-02-09 12:30:00
0
129717
129718
12
2014-02-09 12:30:00
0
129718
129719
23
2014-02-09 12:45:00
338
129719
129720
24
2014-02-09 12:45:00
227
129720
129719 rows × 4 columns
In [28]:
df_txt.drop(['value'], axis=1)
Out[28]:
medidor
date
nueva_col
0
8
2013-11-28 11:00:00
0
1
15
2013-11-28 11:00:00
1
2
16
2013-11-28 11:00:00
2
3
17
2013-11-28 11:00:00
3
4
18
2013-11-28 11:00:00
4
5
20
2013-12-01 09:30:00
5
6
6
2013-12-05 15:45:00
6
7
5
2013-11-18 22:15:00
7
8
6
2013-11-18 22:15:00
8
9
7
2013-11-18 22:15:00
9
10
8
2013-11-18 22:15:00
10
11
9
2013-11-18 22:15:00
11
12
10
2013-11-18 22:15:00
12
13
13
2013-11-18 22:15:00
13
14
14
2013-11-18 22:15:00
14
15
15
2013-11-18 22:15:00
15
16
16
2013-11-18 22:15:00
16
17
17
2013-11-18 22:15:00
17
18
18
2013-11-18 22:15:00
18
19
7
2013-11-18 22:30:00
19
20
8
2013-11-18 22:30:00
20
21
9
2013-11-18 22:30:00
21
22
10
2013-11-18 22:30:00
22
23
13
2013-11-18 22:30:00
23
24
14
2013-11-18 22:30:00
24
25
15
2013-11-18 22:30:00
25
26
16
2013-11-18 22:30:00
26
27
17
2013-11-18 22:30:00
27
28
18
2013-11-18 22:30:00
28
29
5
2013-11-18 22:30:00
29
...
...
...
...
129691
20
2014-02-09 12:45:00
129691
129692
12
2014-02-09 11:30:00
129692
129693
9
2014-02-09 11:30:00
129693
129694
10
2014-02-09 11:30:00
129694
129695
9
2014-02-09 11:45:00
129695
129696
10
2014-02-09 11:45:00
129696
129697
7
2014-02-09 12:00:00
129697
129698
8
2014-02-09 12:00:00
129698
129699
5
2014-02-09 12:00:00
129699
129700
6
2014-02-09 12:00:00
129700
129701
15
2014-02-09 12:00:00
129701
129702
16
2014-02-09 12:00:00
129702
129703
19
2014-02-09 12:00:00
129703
129704
20
2014-02-09 12:00:00
129704
129705
17
2014-02-09 12:00:00
129705
129706
18
2014-02-09 12:00:00
129706
129707
9
2014-02-09 12:15:00
129707
129708
10
2014-02-09 12:15:00
129708
129709
5
2014-02-09 12:30:00
129709
129710
6
2014-02-09 12:30:00
129710
129711
15
2014-02-09 12:30:00
129711
129712
16
2014-02-09 12:30:00
129712
129713
19
2014-02-09 12:30:00
129713
129714
20
2014-02-09 12:30:00
129714
129715
17
2014-02-09 12:30:00
129715
129716
18
2014-02-09 12:30:00
129716
129717
11
2014-02-09 12:30:00
129717
129718
12
2014-02-09 12:30:00
129718
129719
23
2014-02-09 12:45:00
129719
129720
24
2014-02-09 12:45:00
129720
129721 rows × 3 columns
In [29]:
df1 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bcd'))
df1
Out[29]:
b
c
d
0
0
1
2
1
3
4
5
2
6
7
8
3
9
10
11
In [30]:
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=['b', 'd', 'e'])
df2
Out[30]:
b
d
e
0
0
1
2
1
3
4
5
2
6
7
8
3
9
10
11
In [31]:
suma_nan = df1 + df2
suma_nan
Out[31]:
b
c
d
e
0
0
NaN
3
NaN
1
6
NaN
9
NaN
2
12
NaN
15
NaN
3
18
NaN
21
NaN
In [32]:
df1.add(df2, fill_value=0) #cambiar el cero para ver como afecta a las funciones
Out[32]:
b
c
d
e
0
0
1
3
2
1
6
4
9
5
2
12
7
15
8
3
18
10
21
11
In [33]:
df1.div(df2, fill_value=1)
Out[33]:
b
c
d
e
0
NaN
1
2.000000
0.500000
1
1
4
1.250000
0.200000
2
1
7
1.142857
0.125000
3
1
10
1.100000
0.090909
In [34]:
df1.apply(lambda x: x + 1)
Out[34]:
b
c
d
0
1
2
3
1
4
5
6
2
7
8
9
3
10
11
12
In [35]:
def mi_incremento(x):
return x + 1
df1['b'].apply(mi_incremento)
Out[35]:
0 1
1 4
2 7
3 10
Name: b, dtype: float64
In [36]:
df1.sort_index(ascending=False, axis=1) # ver como cambia el orden cambiando axis
Out[36]:
d
c
b
0
2
1
0
1
5
4
3
2
8
7
6
3
11
10
9
In [38]:
df1['b'].sort_values()
Out[38]:
0 0
1 3
2 6
3 9
Name: b, dtype: float64
In [39]:
df1.rank(method='max', ascending=False) #method puede ser average, min, max o first
#df1
Out[39]:
b
c
d
0
4
4
4
1
3
3
3
2
2
2
2
3
1
1
1
In [40]:
df_txt.describe()
Out[40]:
medidor
value
nueva_col
count
129721.000000
129721.000000
129721.000000
mean
14.825263
7426.511374
64860.000000
std
6.074758
136264.699673
37447.371472
min
5.000000
0.000000
0.000000
25%
9.000000
0.000000
32430.000000
50%
16.000000
0.000000
64860.000000
75%
20.000000
344.000000
97290.000000
max
24.000000
6842516.000000
129720.000000
In [43]:
df_txt['value'].pct_change()
Out[43]:
0 NaN
1 inf
2 -0.469653
3 -1.000000
4 NaN
5 inf
6 -1.000000
7 inf
8 -0.482781
9 1.699948
10 0.265296
11 -1.000000
12 NaN
13 inf
14 -0.202264
15 -1.000000
16 NaN
17 inf
18 0.413730
19 1.040968
20 -0.354573
21 -1.000000
22 NaN
23 inf
24 -0.214306
25 -1.000000
26 NaN
27 inf
28 0.449929
29 -0.172111
...
129691 -0.464286
129692 -1.000000
129693 NaN
129694 NaN
129695 NaN
129696 NaN
129697 NaN
129698 NaN
129699 NaN
129700 NaN
129701 inf
129702 -0.491639
129703 -0.384868
129704 -0.475936
129705 -1.000000
129706 NaN
129707 NaN
129708 NaN
129709 NaN
129710 NaN
129711 inf
129712 -0.498377
129713 -0.355987
129714 -0.477387
129715 -1.000000
129716 NaN
129717 NaN
129718 NaN
129719 inf
129720 -0.328402
Name: value, dtype: float64
In [44]:
df_txt['value'].pct_change
Out[44]:
<bound method Series.pct_change of 0 0
1 692
2 367
3 0
4 0
5 71
6 0
7 29705
8 15364
9 41482
10 52487
11 0
12 0
13 22792
14 18182
15 0
16 0
17 22273
18 31488
19 64266
20 41479
21 0
22 0
23 14344
24 11270
25 0
26 0
27 20481
28 29696
29 24585
...
129691 105
129692 0
129693 0
129694 0
129695 0
129696 0
129697 0
129698 0
129699 0
129700 0
129701 598
129702 304
129703 187
129704 98
129705 0
129706 0
129707 0
129708 0
129709 0
129710 0
129711 616
129712 309
129713 199
129714 104
129715 0
129716 0
129717 0
129718 0
129719 338
129720 227
Name: value, dtype: int64>
In [45]:
df_txt['value'].corr(df_txt['medidor'])
Out[45]:
0.041885128928345339
In [46]:
df_txt.corr()
Out[46]:
medidor
value
nueva_col
medidor
1.000000
0.041885
-0.055407
value
0.041885
1.000000
0.011713
nueva_col
-0.055407
0.011713
1.000000
In [47]:
suma_nan.isnull()
Out[47]:
b
c
d
e
0
False
True
False
True
1
False
True
False
True
2
False
True
False
True
3
False
True
False
True
In [48]:
test_1 = suma_nan.fillna(10)
test_1
Out[48]:
b
c
d
e
0
0
10
3
10
1
6
10
9
10
2
12
10
15
10
3
18
10
21
10
In [49]:
test_2 = suma_nan.interpolate(method='linear', axis=1) #cambiar axis y ver que sucede
test_2
Out[49]:
b
c
d
e
0
0
1.5
3
3
1
6
7.5
9
9
2
12
13.5
15
15
3
18
19.5
21
21
In [ ]:
suma_nan.interpolate?
In [51]:
import scipy.stats as stats
print stats.ttest_1samp(df_txt['value'], popmean=df_txt['value'].mean())
Ttest_1sampResult(statistic=0.0, pvalue=1.0)
In [52]:
print test_1
print test_2
merged = pd.merge(test_1, test_2, on='d', how='outer')
merged
#left: use only keys from left frame (SQL: left outer join)
#right: use only keys from right frame (SQL: right outer join)
#outer: use union of keys from both frames (SQL: full outer join)
#inner: use intersection of keys from both frames (SQL: inner join)
b c d e
0 0 10 3 10
1 6 10 9 10
2 12 10 15 10
3 18 10 21 10
b c d e
0 0 1.5 3 3
1 6 7.5 9 9
2 12 13.5 15 15
3 18 19.5 21 21
Out[52]:
b_x
c_x
d
e_x
b_y
c_y
e_y
0
0
10
3
10
0
1.5
3
1
6
10
9
10
6
7.5
9
2
12
10
15
10
12
13.5
15
3
18
10
21
10
18
19.5
21
In [53]:
merged = pd.merge(test_1, test_2, on=['d', 'b', 'c', 'e'], how='inner')
merged
Out[53]:
b
c
d
e
In [54]:
merged = pd.merge(test_1, test_2, on=['d', 'b', 'c', 'e'], how='outer')
merged
Out[54]:
b
c
d
e
0
0
10.0
3
10
1
6
10.0
9
10
2
12
10.0
15
10
3
18
10.0
21
10
4
0
1.5
3
3
5
6
7.5
9
9
6
12
13.5
15
15
7
18
19.5
21
21
In [55]:
mi_serie_df = mi_serie_med.to_frame()
mi_serie_df.columns = ['date']
merged = pd.merge(df_txt, mi_serie_df, on='date', how='outer')
merged
print mi_serie_df.count()
print df_txt.count()
print merged.count()
date 37153
dtype: int64
medidor 129721
date 129721
value 129721
nueva_col 129721
dtype: int64
medidor 129721
date 159589
value 129721
nueva_col 129721
dtype: int64
In [56]:
merged_inter = merged.interpolate()
merged_inter.count()
Out[56]:
medidor 159589
date 159589
value 159589
nueva_col 159589
dtype: int64
In [57]:
pd.concat([df_txt['date'], df_other_excel['Evento']], axis=1) #notar que ocurre al cambiar el axis
Out[57]:
date
Evento
0
2013-11-28 11:00:00
ATENCION MEDICA EN DOMICILIOS
1
2013-11-28 11:00:00
ACCIDENTE DE TRANSITO
2
2013-11-28 11:00:00
ACCIDENTE DE TRANSITO
3
2013-11-28 11:00:00
ATENCION MEDICA EN VIA PUBLICA
4
2013-11-28 11:00:00
ACCIDENTE DE TRANSITO
5
2013-12-01 09:30:00
ATENCION MEDICA EN DOMICILIOS
6
2013-12-05 15:45:00
ATENCION MEDICA EN VIA PUBLICA
7
2013-11-18 22:15:00
ATENCION MEDICA EN DOMICILIOS
8
2013-11-18 22:15:00
ATENCION MEDICA EN VIA PUBLICA
9
2013-11-18 22:15:00
ATENCION MEDICA EN VIA PUBLICA
10
2013-11-18 22:15:00
ATENCION MEDICA EN VIA PUBLICA
11
2013-11-18 22:15:00
ATENCION MEDICA EN VIA PUBLICA
12
2013-11-18 22:15:00
ATENCION MEDICA EN VIA PUBLICA
13
2013-11-18 22:15:00
ACCIDENTE DE TRANSITO
14
2013-11-18 22:15:00
ATENCION MEDICA EN DOMICILIOS
15
2013-11-18 22:15:00
ATENCION MEDICA EN DOMICILIOS
16
2013-11-18 22:15:00
ATENCION MEDICA EN DOMICILIOS
17
2013-11-18 22:15:00
RECLAMOS POR ARBOLES CAIDOS O RAMAS
18
2013-11-18 22:15:00
ATENCION MEDICA EN DOMICILIOS
19
2013-11-18 22:30:00
ATENCION MEDICA EN VIA PUBLICA
20
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
21
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
22
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
23
2013-11-18 22:30:00
ATENCION MEDICA EN VIA PUBLICA
24
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
25
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
26
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
27
2013-11-18 22:30:00
ACCIDENTE DE TRANSITO
28
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
29
2013-11-18 22:30:00
ATENCION MEDICA EN DOMICILIOS
...
...
...
129691
2014-02-09 12:45:00
NaN
129692
2014-02-09 11:30:00
NaN
129693
2014-02-09 11:30:00
NaN
129694
2014-02-09 11:30:00
NaN
129695
2014-02-09 11:45:00
NaN
129696
2014-02-09 11:45:00
NaN
129697
2014-02-09 12:00:00
NaN
129698
2014-02-09 12:00:00
NaN
129699
2014-02-09 12:00:00
NaN
129700
2014-02-09 12:00:00
NaN
129701
2014-02-09 12:00:00
NaN
129702
2014-02-09 12:00:00
NaN
129703
2014-02-09 12:00:00
NaN
129704
2014-02-09 12:00:00
NaN
129705
2014-02-09 12:00:00
NaN
129706
2014-02-09 12:00:00
NaN
129707
2014-02-09 12:15:00
NaN
129708
2014-02-09 12:15:00
NaN
129709
2014-02-09 12:30:00
NaN
129710
2014-02-09 12:30:00
NaN
129711
2014-02-09 12:30:00
NaN
129712
2014-02-09 12:30:00
NaN
129713
2014-02-09 12:30:00
NaN
129714
2014-02-09 12:30:00
NaN
129715
2014-02-09 12:30:00
NaN
129716
2014-02-09 12:30:00
NaN
129717
2014-02-09 12:30:00
NaN
129718
2014-02-09 12:30:00
NaN
129719
2014-02-09 12:45:00
NaN
129720
2014-02-09 12:45:00
NaN
129721 rows × 2 columns
In [58]:
print suma_nan
print df1
suma_nan.combine_first(df1)
b c d e
0 0 NaN 3 NaN
1 6 NaN 9 NaN
2 12 NaN 15 NaN
3 18 NaN 21 NaN
b c d
0 0 1 2
1 3 4 5
2 6 7 8
3 9 10 11
Out[58]:
b
c
d
e
0
0
1
3
NaN
1
6
4
9
NaN
2
12
7
15
NaN
3
18
10
21
NaN
In [ ]:
Content source: celiacintas/dss_practica
Similar notebooks: